/*
 * Copyright (C) 2019-2020 Yaong <yaongtime@gmail.com>
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef VC4_PRIVATE_H
#define VC4_PRIVATE_H

#include <string.h>

#include "list.h"

#include "nir.h"
#include "compiler/shader_enums.h"
#include "compiler/spirv/nir_spirv.h"

#include "util/macros.h"
#include "vk_alloc.h"
#include "vk_object.h"

// #include <vulkan/vk_android_native_buffer.h>
#include <vulkan/vulkan_core.h>
#include <vulkan/vk_icd.h>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_intel.h>

#include "vc4_extensions.h"
#include "vc4_entrypoints.h"

#include "wsi_common.h"

#include "vc4_vk_common.h"
#include "vc4_vk_cl.h"

#include "vc4_qir.h"
#include "vc4_vk_program.h"

#include "vc4_drm.h"

#define MAX_VIEWPORTS 1
#define MAX_SCISSORS  1

#define MAX_VBS PIPE_MAX_ATTRIBS
#define MAX_VERTEX_ATTRIBS 8

#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
#define TEXTURE_DATA_FORMAT_NO     255

#define MAX_PUSH_CONSTANT_SIZE   (128)

/* queue types */
#define VC4_QUEUE_GENERAL 0

#define VC4_MAX_DRM_DEVICES (1)

#define VC4_MAX_QUEUE_FAMILIES (2)

#define MAX_SETS 16

#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS                                                  \
   (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)

#define typed_memcpy(dest, src, count) ({				\
			STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
			memcpy((dest), (src), (count) * sizeof(*(src))); \
		})

struct vc4_instance;

VkResult
__vk_errorf(struct vc4_instance *instance,
            VkResult error,
            const char *file,
            int line,
            const char *format,
            ...);

#define vk_error(instance, error) \
    __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
#define vk_errorf(instance, error, format, ...) \
    __vk_errorf(instance, error, __FILE__, __LINE__, format, ##__VA_ARGS__);

/***********************************************************************************************/
#define DRM_VC4_MAX_PERF_COUNTERS	16

struct vc4_hwperfmon {
        uint32_t id;
        uint64_t last_seqno;
        uint8_t events[DRM_VC4_MAX_PERF_COUNTERS];
        uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS];
};

/**
 * A complete bin/render job.
 *
 * This is all of the state necessary to submit a bin/render to the kernel.
 * We want to be able to have multiple in progress at a time, so that we don't
 * need to flush an existing CL just to switch to rendering to a new render
 * target (which would mean reading back from the old render target when
 * starting to render to it again).
 */
struct vc4_vk_job {

      struct list_head list;

      struct vc4_cl bcl;
      struct vc4_cl shader_rec;
      struct vc4_cl uniforms;
      struct vc4_cl bo_handles;
      struct vc4_cl bo_pointers;
      uint32_t shader_rec_count;
      /**
      * Amount of memory used by the BOs in bo_pointers.
      *
      * Used for checking when we should flush the job early so we don't
      * OOM.
      */
      uint32_t bo_space;

      /* Last BO hindex referenced from VC4_PACKET_GEM_HANDLES. */
      uint32_t last_gem_handle_hindex;

      /** @{ Surfaces to submit rendering for. */
      struct vc4_image *color_read;
      struct vc4_image *color_write;
      struct vc4_image *zs_read;
      struct vc4_image *zs_write;
      struct vc4_image *msaa_color_write;
      struct vc4_image *msaa_zs_write;

      /** @} */
      /** @{
      * Bounding box of the scissor across all queued drawing.
      *
      * Note that the max values are exclusive.
      */
      uint32_t draw_min_x;
      uint32_t draw_min_y;
      uint32_t draw_max_x;
      uint32_t draw_max_y;
      /** @} */
      /** @{
      * Width/height of the color framebuffer being rendered to,
      * for VC4_TILE_RENDERING_MODE_CONFIG.
      */
      uint32_t draw_width;
      uint32_t draw_height;
      /** @} */
      /** @{ Tile information, depending on MSAA and float color buffer. */
      uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
      uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */

      uint32_t tile_width; /** @< Width of a tile. */
      uint32_t tile_height; /** @< Height of a tile. */
      /** Whether the current rendering is in a 4X MSAA tile buffer. */
      bool msaa;
      /** @} */
      /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the
      * first rendering.
      */
      uint32_t cleared;
      /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to
      * (either clears or draws).
      */
      uint32_t resolve;
      uint32_t clear_color[2];
      uint32_t clear_depth; /**< 24-bit unorm depth */
      uint8_t clear_stencil;

      /**
      * Set if some drawing (triangles, blits, or just a glClear()) has
      * been done to the FBO, meaning that we need to
      * DRM_IOCTL_VC4_SUBMIT_CL.
      */
      bool needs_flush;

      /**
      * Number of draw calls (not counting full buffer clears) queued in
      * the current job.
      */
      uint32_t draw_calls_queued;

      /** Any flags to be passed in drm_vc4_submit_cl.flags. */
      uint32_t flags;

      struct drm_vc4_submit_cl submit;

      /* Performance monitor attached to this job. */
      struct vc4_hwperfmon *perfmon;

      struct vc4_device *device;
      struct vc4_cmd_buffer *cmd_buffer;

      // struct vc4_job_key key;
};

struct vc4_physical_device
{
    VK_LOADER_DATA _loader_data;

    struct vc4_instance *instance;

    char path[20];
    char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
    //    uint8_t driver_uuid[VK_UUID_SIZE];
    //    uint8_t device_uuid[VK_UUID_SIZE];
    uint8_t cache_uuid[VK_UUID_SIZE];

    struct wsi_device wsi_device;

    int local_fd;
    int master_fd;

    uint32_t next_program_id;

    struct vc4_device_extension_table supported_extensions;
};

struct vc4_instance
{
    struct vk_object_base base;

    VkAllocationCallbacks alloc;

    uint32_t api_version;
    int physical_device_count;

    struct vc4_physical_device physical_devices[VC4_MAX_DRM_DEVICES];

    struct vc4_instance_extension_table enabled_extensions;
};

struct vc4_queue
{
    VK_LOADER_DATA _loader_data;

    struct vc4_device *device;
    uint32_t queue_family_index;
    int queue_idx;

    VkDeviceQueueCreateFlags flags;

    /** Handle of syncobj containing the last submitted job fence. */
    uint32_t job_syncobj;

    /** Handle of the syncobj that holds in_fence_fd for submission. */
    struct vc4_semaphore *sem;

    /** Seqno of the last CL flush's job. */
    uint64_t last_emit_seqno;
};

// struct vc4_pipeline_cache_stats {
//    uint32_t miss;
//    uint32_t hit;
//    uint32_t count;
// };

struct vc4_pipeline_cache {
   VK_LOADER_DATA _loader_data;

   struct vc4_device *device;
   // mtx_t mutex;

   // struct hash_table *nir_cache;
   // struct vc4_pipeline_cache_stats nir_stats;

   // struct hash_table *variant_cache;
   // struct vc4_pipeline_cache_stats variant_stats;
};

struct vc4_device
{
   struct vk_device vk;

   // VkAllocationCallbacks alloc;

   int fd;

   struct vc4_instance *instance;

   bool has_syncobj;
   uint32_t last_job_sync;
   mtx_t mutex;

   /** The last seqno we've completed a wait for.
    *
    * This lets us slightly optimize our waits by skipping wait syscalls
    * if we know the job's already done.
    */
   uint64_t finished_seqno;

   struct vc4_queue *queues[VC4_MAX_QUEUE_FAMILIES];
   int queue_count[VC4_MAX_QUEUE_FAMILIES];

   struct vc4_device_extension_table enabled_extensions;

   struct vc4_physical_device *physical_device;
   int _lost;

   bool has_control_flow;
   bool has_threaded_fs;
};

// struct vc4_bo
// {
//    uint32_t gem_handle;
//    uint32_t size;

//    volatile uint32_t last_hindex;

//    void *map;
// };

struct vc4_bo {
      void *map;
      // const char *name;
      uint32_t handle;
      uint32_t size;

      /* This will be read/written by multiple threads without a lock -- you
      * should take a snapshot and use it to see if you happen to be in the
      * CL's handles at this position, to make most lookups O(1).  It's
      * volatile to make sure that the compiler doesn't emit multiple loads
      * from the address, which would make the lookup racy.
      */
      volatile uint32_t last_hindex;

      /** Entry in the linked list of buffers freed, by age. */
      // struct list_head time_list;
      /** Entry in the per-page-count linked list of buffers freed (by age). */
      // struct list_head size_list;
      /** Approximate second when the bo was freed. */
      // time_t free_time;
      /**
      * Whether only our process has a reference to the BO (meaning that
      * it's safe to reuse it in the BO cache).
      */
      bool private;
};

enum vc4_cmd_buffer_status
{
   VC4_CMD_BUFFER_STATUS_INVALID = 0,
   VC4_CMD_BUFFER_STATUS_INITIAL,
   VC4_CMD_BUFFER_STATUS_RECORDING,
   VC4_CMD_BUFFER_STATUS_EXECUTABLE,
   VC4_CMD_BUFFER_STATUS_PENDING,
};

/* Flags for dirty pipeline state.
 */
enum vc4_cmd_dirty_bits {
   VC4_CMD_DIRTY_VIEWPORT                  = 1 << 0,
   VC4_CMD_DIRTY_SCISSOR                   = 1 << 1,
   VC4_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
   VC4_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
   VC4_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
   VC4_CMD_DIRTY_PIPELINE                  = 1 << 5,
   VC4_CMD_DIRTY_VERTEX_BUFFER             = 1 << 6,
   VC4_CMD_DIRTY_INDEX_BUFFER              = 1 << 7,
   VC4_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 8,
   VC4_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 9,
   VC4_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 10,
   VC4_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 11,
   VC4_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 12,
   VC4_CMD_DIRTY_DEPTH_BIAS                = 1 << 13,
   VC4_CMD_DIRTY_LINE_WIDTH                = 1 << 14,
};

struct vc4_descriptor_state {
   struct vc4_descriptor_set *descriptor_sets[MAX_SETS];
   uint32_t valid;
   uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
};

struct vc4_viewport_state {
   uint32_t count;
   VkViewport viewports[MAX_VIEWPORTS];
   float translate[MAX_VIEWPORTS][3];
   float scale[MAX_VIEWPORTS][3];
};

struct vc4_scissor_state {
   uint32_t count;
   VkRect2D scissors[MAX_SCISSORS];
};

struct vc4_dynamic_state {
   /**
    * Bitmask of (1 << VK_DYNAMIC_STATE_*).
    * Defines the set of saved dynamic state.
    */
   uint32_t mask;

   struct vc4_viewport_state viewport;

   struct vc4_scissor_state scissor;

   struct {
      uint32_t front;
      uint32_t back;
   } stencil_compare_mask;

   struct {
      uint32_t front;
      uint32_t back;
   } stencil_write_mask;

   struct {
      uint32_t front;
      uint32_t back;
   } stencil_reference;

   float blend_constants[4];

   struct {
      VkBool32 depthBiasEnable;
      float constant_factor;
      float slope_factor;
   } depth_bias;

   float line_width;
};

struct vc4_attachment_state {
   VkImageAspectFlags pending_clear_aspects;
   VkClearValue clear_value;
};

// struct vc4_vertex_binding {
//    struct vc4_buffer *buffer;
//    VkDeviceSize offset;
// };

struct vc4_cmd_buffer_state {
   struct vc4_render_pass *pass;
   struct vc4_framebuffer *framebuffer;
   VkRect2D render_area;

   struct list_head render_jobs;
   struct list_head render_jobs_done;
   struct vc4_context *vc4;

   struct vc4_descriptor_state descriptor_state[2];

   uint32_t subpass_idx;

   struct vc4_pipeline *pipeline;

   struct vc4_dynamic_state dynamic;
   uint32_t dirty;

   /* Current clip window. We use this to check whether we have an active
    * scissor, since in that case we can't use TLB clears and need to fallback
    * to drawing rects.
    */
   VkRect2D clip_window;

   struct vc4_attachment_state *attachments;

   // struct vc4_vertex_binding vertex_bindings[MAX_VBS];
   struct vc4_vertexbuf_stateobj vertexbuf;
   struct {
      VkBuffer buffer;
      VkDeviceSize offset;
      uint8_t index_size;
   } index_buffer;
};

struct vc4_cmd_pool
{
   VkAllocationCallbacks alloc;
   struct list_head cmd_buffers;
   struct list_head free_cmd_buffers;
   uint32_t queue_family_index;
};

struct vc4_cmd_buffer
{
   struct vk_object_base base;

   struct vc4_device *device;

   struct vc4_cmd_pool *pool;
   struct list_head pool_link;

   VkCommandBufferUsageFlags usage_flags;
   VkCommandBufferLevel level;

   enum vc4_cmd_buffer_status status;

   struct vc4_cmd_buffer_state state;
// uint32_t vertex_bindings_set;
   uint32_t queue_family_index;

   uint32_t push_constants_data[MAX_PUSH_CONSTANT_SIZE];

   VkResult record_result;
};

struct vc4_buffer
{
   VkDeviceSize size;

   VkBufferUsageFlags usage;
   VkBufferCreateFlags flags;

   struct vc4_bo *bo;
   VkDeviceSize bo_offset;
};

struct vc4_device_memory
{
   struct vc4_bo bo;
   VkDeviceSize size;

   /* for dedicated allocations */
//    struct vc4_image *image;
//    struct vc4_buffer *buffer;

   uint32_t type_index;
//    void *map;
//    void *user_ptr;
};

#define VC4_MAX_MIP_LEVELS 12

struct vc4_resource_slice {
   uint32_t offset;
   uint32_t stride;
   uint32_t size;

   /** One of VC4_TILING_FORMAT_* */
   uint8_t tiling;
};

struct vc4_format {
        /** Set if the pipe format is defined in the table. */
        bool supported;

        /** Set to 0 if unsupported, 1 if RGBA8888, 2 if rgb565. */
        uint8_t rt_type;

        /** One of VC4_TEXTURE_TYPE_*. */
        uint8_t tex_type;

        /**
         * Swizzle to apply to the RGBA shader output for storing to the tile
         * buffer, to the RGBA tile buffer to produce shader input (for
         * blending), and for turning the rgba8888 texture sampler return
         * value into shader rgba values.
         */
        uint8_t swizzle[4];
};

struct vc4_image {
   VkImageType type;
   VkImageAspectFlags aspects;

   VkExtent3D extent;
   uint32_t levels;
   uint32_t array_size;
   uint32_t samples;
   VkImageUsageFlags usage;
   VkImageCreateFlags create_flags;
   VkImageTiling tiling;

   VkFormat vk_format;
   // const struct vc4_format *format;

   uint32_t cpp;

   uint64_t drm_format_mod;
   bool tiled;

   struct vc4_resource_slice slices[VC4_MAX_MIP_LEVELS];
   uint64_t size; /* Total size in bytes */
   uint32_t cube_map_stride;
   uint32_t alignment;

   struct vc4_device_memory *mem;
   VkDeviceSize mem_offset;
};

struct vc4_image_view {
   const struct vc4_image *image;
   VkImageAspectFlags aspects;
   VkExtent3D extent;
   VkImageViewType type;

   VkFormat vk_format;
   const struct vc4_format *format;
   bool swap_rb;
   uint32_t internal_bpp;
   uint32_t internal_type;

   uint32_t base_level;
   uint32_t max_level;
   uint32_t first_layer;
   uint32_t last_layer;
   uint32_t offset;

   /* Precomputed (composed from createinfo->components and formar swizzle)
    * swizzles to pass in to the shader key.
    *
    * This could be also included on the descriptor bo, but the shader state
    * packet doesn't need it on a bo, so we can just avoid a memory copy
    */
   uint8_t swizzle[4];

   /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
    * during UpdateDescriptorSets.
    *
    * Empirical tests show that cube arrays need a different shader state
    * depending on whether they are used with a sampler or not, so for these
    * we generate two states and select the one to use based on the descriptor
    * type.
    */
   // uint8_t texture_shader_state[2][cl_packet_length(TEXTURE_SHADER_STATE)];
};

struct vc4_sampler {

   VkSamplerCreateInfo create_info;
   /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
    * configuration. If needed it will be copied to the descriptor info during
    * UpdateDescriptorSets
    */
   // uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)];
};

struct vc4_descriptor {
   VkDescriptorType type;

   union {
      struct {
         struct vc4_image_view *image_view;
         struct vc4_sampler *sampler;
      };

      struct {
         struct vc4_buffer *buffer;
         uint32_t offset;
         uint32_t range;
      };

      struct vc4_buffer_view *buffer_view;
   };
};

#define VC4_DESC_SET_MAP_NUM  32

struct vc4_descriptor_map {
   /* TODO: avoid fixed size array/justify the size */
   unsigned num_desc; /* Number of descriptors  */
   int set[VC4_DESC_SET_MAP_NUM];
   int binding[VC4_DESC_SET_MAP_NUM];
   int array_index[VC4_DESC_SET_MAP_NUM];
   int array_size[VC4_DESC_SET_MAP_NUM];

   /* The following makes sense for textures, but this is the easier place to
    * put it
    */
   bool is_shadow[VC4_DESC_SET_MAP_NUM];
};

struct vc4_descriptor_set_binding_layout {
   VkDescriptorType type;

   /* Number of array elements in this binding */
   uint32_t array_size;

   /* Index into the flattend descriptor set */
   uint32_t descriptor_index;

   uint32_t dynamic_offset_count;
   uint32_t dynamic_offset_index;

   /* Offset into the descriptor set where this descriptor lives (final offset
    * on the descriptor bo need to take into account set->base_offset)
    */
   uint32_t descriptor_offset;

   /* Offset in the vc4_descriptor_set_layout of the immutable samplers, or 0
    * if there are no immutable samplers.
    */
   uint32_t immutable_samplers_offset;
};

struct vc4_descriptor_set_layout {
   VkDescriptorSetLayoutCreateFlags flags;

   /* Number of bindings in this descriptor set */
   uint32_t binding_count;

   /* Total bo size needed for this descriptor set
    */
   uint32_t bo_size;

   /* Shader stages affected by this descriptor set */
   uint16_t shader_stages;

   /* Number of descriptors in this descriptor set */
   uint32_t descriptor_count;

   /* Number of dynamic offsets used by this descriptor set */
   uint16_t dynamic_offset_count;

   /* Bindings in this descriptor set */
   struct vc4_descriptor_set_binding_layout binding[0];
};

struct vc4_descriptor_pool_entry
{
   uint32_t offset;
   uint32_t size;
   struct vc4_descriptor_set *set;
};

struct vc4_descriptor_pool
{
   struct vk_object_base base;

   struct vc4_bo bo;
   uint64_t current_offset;
   uint64_t size;

   uint8_t *host_memory_base;
   uint8_t *host_memory_ptr;
   uint8_t *host_memory_end;

   uint32_t entry_count;
   uint32_t max_entry_count;
   struct vc4_descriptor_pool_entry entries[0];
};

struct vc4_descriptor_set
{
   struct vk_object_base base;

   const struct vc4_descriptor_set_layout *layout;
   struct vc4_descriptor_pool *pool;
   uint32_t size;

   // uint64_t va;
   uint32_t *mapped_ptr;

   uint32_t *dynamic_descriptors;

   /* The descriptors below can be indexed (set/binding) using the set_layout
    */
   struct vc4_descriptor descriptors[0];
};

struct vc4_pipeline_layout {
   struct {
      struct vc4_descriptor_set_layout *layout;
      uint32_t dynamic_offset_start;
   } set[MAX_SETS];

   uint32_t num_sets;
   uint32_t dynamic_offset_count;

   uint32_t push_constant_size;
};

struct vc4_subpass_attachment {
   uint32_t attachment;
   VkImageLayout layout;
};

struct vc4_semaphore {
   /* A syncobject handle associated with this semaphore */
   uint32_t sync;

   /* The file handle of a fence that we imported into our syncobject */
   int32_t fd;
};

struct vc4_fence {
   /* A syncobject handle associated with this fence */
   uint32_t sync;

   /* The file handle of a fence that we imported into our syncobject */
   int32_t fd;
};

struct vc4_event {
   int state;
};

struct vc4_shader_module {
   /* A NIR shader. We create NIR modules for shaders that are generated
    * internally by the driver.
    */
   struct nir_shader *nir;

   /* A SPIR-V shader */
   unsigned char sha1[20];
   uint32_t size;
   char data[0];
};

/* FIXME: the same function at anv, radv and tu, perhaps create common
 * place?
 */
static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
{
   assert(__builtin_popcount(vk_stage) == 1);
   return ffs(vk_stage) - 1;
}

/*
 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
 * other methods doesn't have so many parameters.
 *
 * FIXME: for the case of the coordinate shader and the vertex shader, module,
 * entrypoint, spec_info and nir are the same. There are also info only
 * relevant to some stages. But seemed too much a hassle to create a new
 * struct only to handle that. Revisit if such kind of info starts to grow.
 */
struct vc4_pipeline_stage {
   struct vc4_pipeline *pipeline;

   gl_shader_stage stage;
   /* FIXME: is_coord only make sense if stage == MESA_SHADER_VERTEX. Perhaps
    * a stage base/vs/fs as keys and prog_data?
    */
   bool is_coord;

   const struct vc4_shader_module *module;
   const char *entrypoint;
   const VkSpecializationInfo *spec_info;

   nir_shader *nir;

   /* The following is the combined hash of module+entrypoint+spec_info+nir */
   unsigned char shader_sha1[20];

   /** A name for this program, so you can track it in shader-db output. */
   uint32_t program_id;
   /** How many variants of this program were compiled, for shader-db. */
   uint32_t compiled_variant_count;

   /* The following are the default v3d_key populated using
    * VkCreateGraphicsPipelineCreateInfo. Variants will be created tweaking
    * them, so we don't need to maintain a copy of that create info struct
    * around
    */
   // union {
   //    struct v3d_key base;
   //    struct v3d_vs_key vs;
   //    struct v3d_fs_key fs;
   // } key;

   // struct vc4_shader_variant*current_variant;

   /* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind
    * of pipe_draw_info
    */
   enum pipe_prim_type topology;
};

struct vc4_framebuffer {
   uint32_t width;
   uint32_t height;
   uint32_t layers;

   uint32_t attachment_count;
   uint32_t color_attachment_count;
   struct vc4_image_view *attachments[0];
};

struct vc4_subpass {
   uint32_t input_count;
   struct vc4_subpass_attachment *input_attachments;

   uint32_t color_count;
   struct vc4_subpass_attachment *color_attachments;
   struct vc4_subpass_attachment *resolve_attachments;

   struct vc4_subpass_attachment ds_attachment;

   bool has_srgb_rt;
};

struct vc4_render_pass_attachment {
   VkAttachmentDescription desc;
   uint32_t first_subpass;
   uint32_t last_subpass;
};

struct vc4_render_pass
{
   struct vk_object_base base;

   uint32_t attachment_count;
   struct vc4_render_pass_attachment *attachments;

   uint32_t subpass_count;
   struct vc4_subpass *subpasses;

   struct vc4_subpass_attachment *subpass_attachments;
};

void
vc4_viewport_compute_xform(const VkViewport *viewport,
                            float scale[3],
                            float translate[3]);

/* Mostly a v3dv mapping of VkDynamicState, used to track which data as
 * defined as dynamic
 */
enum vc4_dynamic_state_bits {
   VC4_DYNAMIC_VIEWPORT                  = 1 << 0,
   VC4_DYNAMIC_SCISSOR                   = 1 << 1,
   VC4_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
   VC4_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
   VC4_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
   VC4_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
   VC4_DYNAMIC_DEPTH_BIAS                = 1 << 6,
   VC4_DYNAMIC_LINE_WIDTH                = 1 << 7,
   VC4_DYNAMIC_ALL                       = (1 << 8) - 1,
};

struct vc4_pipeline {
   struct vc4_device *device;

   VkShaderStageFlags active_stages;

   struct vc4_render_pass *pass;
   struct vc4_subpass *subpass;

   /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track
    * too the coordinate shader
    */
   struct vc4_pipeline_stage *vs;
   struct vc4_pipeline_stage *vs_bin;
   struct vc4_pipeline_stage *fs;
   struct vc4_pipeline_stage *cs;

   // /* Spilling memory requirements */
   // struct {
   //    struct vc4_bo *bo;
   //    uint32_t size_per_thread;
   // } spill;

   struct vc4_dynamic_state dynamic_state;

   VkCullModeFlags   cullMode;
   VkFrontFace       frontFace;
   VkSampleCountFlagBits   rasterizationSamples;

   struct vc4_pipeline_layout *layout;

   // enum vc4_ez_state ez_state;

   // bool msaa;
   // bool sample_rate_shading;
   // uint32_t sample_mask;

   bool primitive_restart;
   VkPrimitiveTopology topology;

   /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
    * array with such binding
    */
   struct vc4_pipeline_vertex_binding {
      uint32_t stride;
      uint32_t instance_divisor;
   } vb[MAX_VBS];
   uint32_t vb_count;

   /* Note that a lot of info from VkVertexInputAttributeDescription is
    * already prepacked, so here we are only storing those that need recheck
    * later. The array must be indexed by driver location, since that is the
    * order in which we need to emit the attributes.
    */
   // struct vc4_pipeline_vertex_attrib {
   //    uint32_t binding;
   //    uint32_t offset;
   //    VkFormat vk_format;
   // } va[MAX_VERTEX_ATTRIBS];

   struct vc4_vertex_stateobj vtx;
   // uint32_t va_count;

   struct vc4_descriptor_map ubo_map;
   struct vc4_descriptor_map ssbo_map;

   struct vc4_descriptor_map sampler_map;
   struct vc4_descriptor_map texture_map;

   // /*
   //  * Vulkan has separate texture and sampler objects. Previous sampler and
   //  * texture map uses a sampler and texture index respectively, that can be
   //  * different. But OpenGL combine both (or in other words, they are the
   //  * same). The v3d compiler and all the nir lowerings that they use were
   //  * written under that assumption. In order to not update all those, we
   //  * combine the indexes, and we use the following maps to get one or the
   //  * other. In general the driver side uses the tex/sampler indexes to gather
   //  * resources, and the compiler side uses the combined index (so the v3d key
   //  * texture info will be indexed using the combined index).
   //  */
   // struct hash_table *combined_index_map;
   // uint32_t combined_index_to_key_map[32];
   // uint32_t next_combined_index;

   // /* FIXME: this bo is another candidate to data to be uploaded using a
   //  * resource manager, instead of a individual bo
   //  */
   // struct vc4_bo *default_attribute_values;

   // struct vpm_config vpm_cfg;
   // struct vpm_config vpm_cfg_bin;

   // /* If the pipeline should emit any of the stencil configuration packets */
   // bool emit_stencil_cfg[2];

   // /* If the pipeline is using push constants */
   // bool use_push_constants;

   // /* Blend state */
   // struct {
   //    /* Per-RT bit mask with blend enables */
   //    uint8_t enables;
   //    /* Per-RT prepacked blend config packets */
   //    uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)];
   //    /* Flag indicating whether the blend factors in use require
   //     * color constants.
   //     */
   //    bool needs_color_constants;
   //    /* Mask with enabled color channels for each RT (4 bits per RT) */
   //    uint32_t color_write_masks;
   // } blend;

   struct pipe_blend_state blend_state;
   struct pipe_blend_color blend_color;

   struct pipe_depth_stencil_alpha_state ds_state;
   struct pipe_stencil_ref stencil_ref;

   // /* Depth bias */
   // struct {
   //    bool enabled;
   //    bool is_z16;
   // } depth_bias;

};

enum pipe_texture_target
vc4_vk_type_to_pipe_type(VkImageType type);

#define VC4_DEFINE_HANDLE_CASTS(__vc4_type, __VkType)                            \
                                                                                \
    static inline struct __vc4_type *__vc4_type##_from_handle(__VkType _handle) \
    {                                                                           \
        return (struct __vc4_type *)_handle;                                    \
    }                                                                           \
                                                                                \
    static inline __VkType __vc4_type##_to_handle(struct __vc4_type *_obj)      \
    {                                                                           \
        return (__VkType)_obj;                                                  \
    }

#define VC4_DEFINE_NONDISP_HANDLE_CASTS(__vc4_type, __VkType)                  \
                                                                             \
   static inline struct __vc4_type *__vc4_type##_from_handle(__VkType _handle) \
   {                                                                         \
      return (struct __vc4_type *) (uintptr_t) _handle;                       \
   }                                                                         \
                                                                             \
   static inline __VkType __vc4_type##_to_handle(struct __vc4_type *_obj)      \
   {                                                                         \
      return (__VkType)(uintptr_t) _obj;                                     \
   }

#define VC4_FROM_HANDLE(__vc4_type, __name, __handle) \
    struct __vc4_type *__name = __vc4_type##_from_handle(__handle)

VC4_DEFINE_HANDLE_CASTS(vc4_device, VkDevice)
VC4_DEFINE_HANDLE_CASTS(vc4_instance, VkInstance)
VC4_DEFINE_HANDLE_CASTS(vc4_physical_device, VkPhysicalDevice)
VC4_DEFINE_HANDLE_CASTS(vc4_queue, VkQueue)
VC4_DEFINE_HANDLE_CASTS(vc4_cmd_buffer, VkCommandBuffer)

VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_cmd_pool, VkCommandPool)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_buffer, VkBuffer)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_image, VkImage)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_image_view, VkImageView)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_framebuffer, VkFramebuffer)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_device_memory, VkDeviceMemory)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_render_pass, VkRenderPass)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_shader_module, VkShaderModule)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_sampler, VkSampler)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_descriptor_set_layout, VkDescriptorSetLayout)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_descriptor_set, VkDescriptorSet)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_descriptor_pool, VkDescriptorPool)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_pipeline_layout, VkPipelineLayout)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_fence, VkFence)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_event, VkEvent)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_semaphore, VkSemaphore)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_pipeline, VkPipeline)
VC4_DEFINE_NONDISP_HANDLE_CASTS(vc4_pipeline_cache, VkPipelineCache)

/* This is defined as a macro so that it works for both
 * VkImageSubresourceRange and VkImageSubresourceLayers
 */
#define vc4_layer_count(_image, _range) \
   ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
    (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)

#define vc4_level_count(_image, _range) \
   ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
    (_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)

void *
vc4_lookup_entrypoint_unchecked(const char *name);

void *
vc4_lookup_entrypoint_checked(const char *name,
                              uint32_t core_version,
                              const struct vc4_instance_extension_table *instance,
                              const struct vc4_device_extension_table *device);

uint32_t
vc4_physical_device_api_version(struct vc4_physical_device *device);

VkResult
vc4_wsi_init(struct vc4_physical_device *physical_device);
void
vc4_wsi_finish(struct vc4_physical_device *physical_device);

uint32_t
vc4_layer_offset(const struct vc4_image *image, uint32_t level, uint32_t layer);

#endif